/*-------------------------------------------------------------------------
 *
 * uuid.c
 *      Functions for the built-in type "uuid".
 *
 * Copyright (c) 2007-2017, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *      src/backend/utils/adt/uuid.c
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "access/hash.h"
#include "lib/hyperloglog.h"
#include "libpq/pqformat.h"
#include "port/pg_bswap.h"
#include "utils/builtins.h"
#include "utils/guc.h"
#include "utils/sortsupport.h"
#include "utils/uuid.h"

/* sortsupport for uuid */
typedef struct
{
    int64        input_count;    /* number of non-null values seen */
    bool        estimating;        /* true if estimating cardinality */

    hyperLogLogState abbr_card; /* cardinality estimator */
} uuid_sortsupport_state;

static void string_to_uuid(const char *source, pg_uuid_t *uuid);
static int    uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
static int    uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
static int    uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);

Datum
uuid_in(PG_FUNCTION_ARGS)
{
    char       *uuid_str = PG_GETARG_CSTRING(0);
    pg_uuid_t  *uuid;

    uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
    string_to_uuid(uuid_str, uuid);
    PG_RETURN_UUID_P(uuid);
}

Datum
uuid_out(PG_FUNCTION_ARGS)
{
    pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
    static const char hex_chars[] = "0123456789abcdef";
    StringInfoData buf;
    int            i;

    initStringInfo(&buf);
    for (i = 0; i < UUID_LEN; i++)
    {
        int            hi;
        int            lo;

        /*
         * We print uuid values as a string of 8, 4, 4, 4, and then 12
         * hexadecimal characters, with each group is separated by a hyphen
         * ("-"). Therefore, add the hyphens at the appropriate places here.
         */
        if (i == 4 || i == 6 || i == 8 || i == 10)
            appendStringInfoChar(&buf, '-');

        hi = uuid->data[i] >> 4;
        lo = uuid->data[i] & 0x0F;

        appendStringInfoChar(&buf, hex_chars[hi]);
        appendStringInfoChar(&buf, hex_chars[lo]);
    }

    PG_RETURN_CSTRING(buf.data);
}

/*
 * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
 * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
 * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
 * digits, is the only one used for output.)
 */
static void
string_to_uuid(const char *source, pg_uuid_t *uuid)
{// #lizard forgives
    const char *src = source;
    bool        braces = false;
    int            i;

    if (src[0] == '{')
    {
        src++;
        braces = true;
    }

    for (i = 0; i < UUID_LEN; i++)
    {
        char        str_buf[3];

        if (src[0] == '\0' || src[1] == '\0')
            goto syntax_error;
        memcpy(str_buf, src, 2);
        if (!isxdigit((unsigned char) str_buf[0]) ||
            !isxdigit((unsigned char) str_buf[1]))
            goto syntax_error;

        str_buf[2] = '\0';
        uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
        src += 2;
        if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
            src++;
    }

    if (braces)
    {
        if (*src != '}')
            goto syntax_error;
        src++;
    }

    if (*src != '\0')
        goto syntax_error;

    return;

syntax_error:
    ereport(ERROR,
            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
             errmsg("invalid input syntax for type %s: \"%s\"",
                    "uuid", source)));
}

Datum
uuid_recv(PG_FUNCTION_ARGS)
{
    StringInfo    buffer = (StringInfo) PG_GETARG_POINTER(0);
    pg_uuid_t  *uuid;

    uuid = (pg_uuid_t *) palloc(UUID_LEN);
    memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
    PG_RETURN_POINTER(uuid);
}

Datum
uuid_send(PG_FUNCTION_ARGS)
{
    pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
    StringInfoData buffer;

    pq_begintypsend(&buffer);
    pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
    PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
}

/* internal uuid compare function */
static int
uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
{
    return memcmp(arg1->data, arg2->data, UUID_LEN);
}

Datum
uuid_lt(PG_FUNCTION_ARGS)
{
    pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
    pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);

    PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
}

Datum
uuid_le(PG_FUNCTION_ARGS)
{
    pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
    pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);

    PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
}

Datum
uuid_eq(PG_FUNCTION_ARGS)
{
    pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
    pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);

    PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
}

Datum
uuid_ge(PG_FUNCTION_ARGS)
{
    pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
    pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);

    PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
}

Datum
uuid_gt(PG_FUNCTION_ARGS)
{
    pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
    pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);

    PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
}

Datum
uuid_ne(PG_FUNCTION_ARGS)
{
    pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
    pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);

    PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
}

/* handler for btree index operator */
Datum
uuid_cmp(PG_FUNCTION_ARGS)
{
    pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
    pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);

    PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
}

/*
 * Sort support strategy routine
 */
Datum
uuid_sortsupport(PG_FUNCTION_ARGS)
{
    SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);

    ssup->comparator = uuid_fast_cmp;
    ssup->ssup_extra = NULL;

    if (ssup->abbreviate)
    {
        uuid_sortsupport_state *uss;
        MemoryContext oldcontext;

        oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);

        uss = palloc(sizeof(uuid_sortsupport_state));
        uss->input_count = 0;
        uss->estimating = true;
        initHyperLogLog(&uss->abbr_card, 10);

        ssup->ssup_extra = uss;

        ssup->comparator = uuid_cmp_abbrev;
        ssup->abbrev_converter = uuid_abbrev_convert;
        ssup->abbrev_abort = uuid_abbrev_abort;
        ssup->abbrev_full_comparator = uuid_fast_cmp;

        MemoryContextSwitchTo(oldcontext);
    }

    PG_RETURN_VOID();
}

/*
 * SortSupport comparison func
 */
static int
uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
{
    pg_uuid_t  *arg1 = DatumGetUUIDP(x);
    pg_uuid_t  *arg2 = DatumGetUUIDP(y);

    return uuid_internal_cmp(arg1, arg2);
}

/*
 * Abbreviated key comparison func
 */
static int
uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
{
    if (x > y)
        return 1;
    else if (x == y)
        return 0;
    else
        return -1;
}

/*
 * Callback for estimating effectiveness of abbreviated key optimization.
 *
 * We pay no attention to the cardinality of the non-abbreviated data, because
 * there is no equality fast-path within authoritative uuid comparator.
 */
static bool
uuid_abbrev_abort(int memtupcount, SortSupport ssup)
{// #lizard forgives
    uuid_sortsupport_state *uss = ssup->ssup_extra;
    double        abbr_card;

    if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
        return false;

    abbr_card = estimateHyperLogLog(&uss->abbr_card);

    /*
     * If we have >100k distinct values, then even if we were sorting many
     * billion rows we'd likely still break even, and the penalty of undoing
     * that many rows of abbrevs would probably not be worth it.  Stop even
     * counting at that point.
     */
    if (abbr_card > 100000.0)
    {
#ifdef TRACE_SORT
        if (trace_sort)
            elog(LOG,
                 "uuid_abbrev: estimation ends at cardinality %f"
                 " after " INT64_FORMAT " values (%d rows)",
                 abbr_card, uss->input_count, memtupcount);
#endif
        uss->estimating = false;
        return false;
    }

    /*
     * Target minimum cardinality is 1 per ~2k of non-null inputs.  0.5 row
     * fudge factor allows us to abort earlier on genuinely pathological data
     * where we've had exactly one abbreviated value in the first 2k
     * (non-null) rows.
     */
    if (abbr_card < uss->input_count / 2000.0 + 0.5)
    {
#ifdef TRACE_SORT
        if (trace_sort)
            elog(LOG,
                 "uuid_abbrev: aborting abbreviation at cardinality %f"
                 " below threshold %f after " INT64_FORMAT " values (%d rows)",
                 abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
                 memtupcount);
#endif
        return true;
    }

#ifdef TRACE_SORT
    if (trace_sort)
        elog(LOG,
             "uuid_abbrev: cardinality %f after " INT64_FORMAT
             " values (%d rows)", abbr_card, uss->input_count, memtupcount);
#endif

    return false;
}

/*
 * Conversion routine for sortsupport.  Converts original uuid representation
 * to abbreviated key representation.  Our encoding strategy is simple -- pack
 * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
 * machines, the bytes are stored in reverse order), and treat it as an
 * unsigned integer.
 */
static Datum
uuid_abbrev_convert(Datum original, SortSupport ssup)
{
    uuid_sortsupport_state *uss = ssup->ssup_extra;
    pg_uuid_t  *authoritative = DatumGetUUIDP(original);
    Datum        res;

    memcpy(&res, authoritative->data, sizeof(Datum));
    uss->input_count += 1;

    if (uss->estimating)
    {
        uint32        tmp;

#if SIZEOF_DATUM == 8
        tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
#else                            /* SIZEOF_DATUM != 8 */
        tmp = (uint32) res;
#endif

        addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
    }

    /*
     * Byteswap on little-endian machines.
     *
     * This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
     * comparator) works correctly on all platforms.  If we didn't do this,
     * the comparator would have to call memcmp() with a pair of pointers to
     * the first byte of each abbreviated key, which is slower.
     */
    res = DatumBigEndianToNative(res);

    return res;
}

/* hash index support */
Datum
uuid_hash(PG_FUNCTION_ARGS)
{
    pg_uuid_t  *key = PG_GETARG_UUID_P(0);

    return hash_any(key->data, UUID_LEN);
}

Datum
uuid_hash_extended(PG_FUNCTION_ARGS)
{
	pg_uuid_t  *key = PG_GETARG_UUID_P(0);

	return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
}
